{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "# Regression Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import statsmodels.formula.api as sm\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from scipy import stats"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "## Get the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0.5, 1.0, 'Sales in Several UK Regions')"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADt0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjByYzIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy/EUOrgAAAgAElEQVR4nO3dfZQcVZ3/8feHYSADBMeQCGQCJKwSFRIMjjyIPwjgEh4lAmpYFlRWMTys7q5EiT9X0KPgGs9ZFJQsgjwoBFlMRheBICAqsKBDBhIU4kEIMhP9EYLDkwMm4/f3R1WHnk73pCeZ6u6Z+rzO6ZPqW7eqv11T6W9X3dv3KiIwM7P82qreAZiZWX05EZiZ5ZwTgZlZzjkRmJnlnBOBmVnOORGYmeWcE4FtEUmrJL13mPf5G0kzh3OfjUTSZEkhaet6x7KlRvvfKi+cCAxJ75F0v6QXJD0v6T5J76pXPBGxd0TcsznbSjpB0sOSXpT0nKS7JE0e1gAzliaJN5eUXSjp++nyTEndReu2kbQ4/bvtWGZ/10j6q6SX07/vTyW9dThi3ZK/lTUOJ4KcSz84bgEuBcYBbcAXgdfqGdfmSD88rwM+DbwBmAJ8G/hbjeNoquFrbQssBlqBIyPixQpVvxYRO5D8fXuAq2oUoo0ATgS2F0BELIqI/ojoi4g7ImI5gKS/k3S3pLXpN+zrJbWW25GkrSSdL+n3af2bJI1L142R9P20vFfSryXtXGE/G243pd+Eb5J0naSX0lsR7RXeyzuApyLirki8FBE/jIg/VBHf7ZLOLYnjEUknpstvTb9JPy9ppaQPFtW7RtLlkm6V9ApwmKRjJXWlVybPSLqw2j9ItSRtB/wP0AwcGxGvbGqbiOgDbiI5VoX9TJT0Q0lrJD0l6ZNF61okXSvpz5Iek/SZkquR4r/VtpIukbQ6fVySJqoNVzGSPi3pWUl/lPTRov0cI+m36d+4R9J5w3CIrEpOBPY7oD/9z360pDeWrBdwMTAReBuwG3BhhX19EpgNHJrW/zPwrXTdh0m+pe8G7ATMBfqqjPF9wI0k33p/DFxWod4y4K2S/lPSYZJ2GEJ8NwCnFCpKejuwB/ATSdsDP03rvCmt921Jexft+x+ArwBjgXuBV4DT05iPBc6SNLvK91uNbYHbgFeB96Uf8JuUvpdTgCfS51uRJJNHSK4WjgD+RdKsdJMLgMnAnsDfA/84yO7/L3AgSZLZF9gf+HzR+l1IzoE24J+AbxWdb1cBn4iIscA+wN3VvB8bJhHhR84fJB/w1wDdwHqSD9udK9SdDXQVPV8FvDddfgw4omjdrsA6YGvgDOB+YHoV8RTv80LgzqJ1bwf6Btn2QJJvvGtIPiSvAXaoIr6xJB/ee6TrvgJ8N13+EPDLktf5L+CCdPka4LpNvKdLgP9MlycDAWxdoW4Aby4puxD4fro8M31vfwVOquJ4XpPW7yW5TfZU4e8AHAD8oaT+fODqdPlJYFbRuo8B3RX+Vr8HjilaNwtYVRRzX/F7Bp4FDkyX/wB8Atix3v8f8vjwFYEREY9FxEciYhLJt7GJJB9cSHqTpBvTy/UXge8D4yvsag9gSXrrp5fkg7cf2Bn4HrAUuDG9bfA1Sc1VhvinouW/AGNUocdNRDwQER+MiAnA/wEOIfmmOmh8EfES8BNgTlp3DnB90XYHFLZLtz2V5BtuwTPFcUg6QNLP0tstL5BcAVU6bqX6SW73FGsmSVoFz6UxXlv07X0wX4+IVpIk1AdMTcv3ACaWvLfPkfzNIDkXit/bgPdZYiLwdNHzp9OygrURsb7o+V+AwlXbScAxwNOSfi7poCrekw0TJwIbICIeJ/kGuU9adDHJN9TpEbEjya0BVdj8GeDoiGgteoyJiJ6IWBcRX4yItwPvBo4juXWS5Xv5NUlDauG9VIwvXb8IOCX9EGoBfla03c9LttshIs4qfrmSl7+B5Mpqt4h4A7CQyset1B9IPrCLTWHghywRsRj4OHCzpMOq2XEk7SWfAr4hqYXkvT1V8t7GRsQx6SZ/BCYV7WK3QXa/miSxFOyellUT168j4gSSW28dJFd1ViNOBDmXNoJ+WtKk9PluJPeQH0irjAVeBnoltQHzBtndQuArkvZI9zVB0gnp8mGSpinpUfMiybfb/mF+L++R9HFJbyq8N5L2hcJ7qRhf6laSD7IvAT+IiEJvo1uAvSSdJqk5fbxL0tsGCWcs8HxEvCppf5I2hGr9APi8pElKGrjfCxwP3FxaMSIWAecCP5J0cDU7j4ifknxAnwn8CnhR0mfThuEmSfvo9e7DNwHzJb0x/fufW2G3kCTSz6fHdTzwBZIryEEp6f56qqQ3RMQ6kvNjWM8NG5wTgb1Ecp/4wbTHywPAoyRdMCHpSrof8ALJrZPFg+zrGyTfgu+Q9FK6rwPSdbuQfJC9SHJL5udU8SExRL0kH/wrJL0M3A4sAb5WRXxExGsk7++9JN/oC+UvAUeS3IpZTXKr6j9IGmwrORv4Uvo6X2Bo33C/RNKeci9Jg/bXgFMj4tFylSPiWpK/10/SpFONBcBnSNpHjiftcUVyy+lKkkbdQizd6bo7Sf6GlboWfxnoBJYDK0ga779cZTynAavS249zGbxR2oaZIjwxjZlVR9JZwJyIOLTesdjw8RWBmVUkaVdJB6e3qKaSXHksqXdcNrxG/FgnZpapbUi6yk4hufV2I8mvtW0U8a0hM7Oc860hM7OcG3G3hsaPHx+TJ0+udxhmZiPKQw899Fz6Q8uNjLhEMHnyZDo7O+sdhpnZiCLp6UrrfGvIzCznnAjMzHLOicDMLOcybSNQMoHJlSSDfgVwRkT8b9H6mcCPSH6+DrA4Ir6UZUxmNvKtW7eO7u5uXn311XqH0nDGjBnDpEmTaG6udnDf7BuLvwHcHhEnS9oG2K5MnV9GxHEZx2Fmo0h3dzdjx45l8uTJSNUO6jr6RQRr166lu7ubKVOmVL1dZolAyVy4hwAfAYiIv5JMpGFmo1BHVw8Llq5kdW8fE1tbmDdrKrNntGXyWq+++qqTQBmS2GmnnVizZs2QtsuyjWBPklmirlYyd+uV6TR5pQ5SMjfsbSVT/20g6UxJnZI6h/oGzSx7HV09zF+8gp7ePgLo6e1j/uIVdHT1bHLbzeUkUN7mHJcsE8HWJMMXXx4RM0imATy/pM4ykqkB9wUuJZmQYiMRcUVEtEdE+4QJZX8PYWZ1tGDpSvrWDZxCoG9dPwuWrqxTRDYUWSaCbpK5TR9Mn99Mkhg2iIgXI+LldPlWoDmd0MLMRpDVvX1DKh8tlixZgiQef/xxAFatWsU+++yzia3Kmzx5Ms8991zV9a+55hrOPXeweYKql1kiiIg/Ac+kQ9cCHAH8triOpF2UXsekE2psBazNKiYzy8bE1pYhlddaR1cPB3/1bqac/xMO/urdw3bLatGiRbznPe/hxhtvHJb91UvWvyP4Z+B6SctJZkC6SNJcSXPT9ScDj0p6BPgmyYQXHg7VbISZN2sqLc1NA8pampuYN2tqhS1qJ6v2i5dffpn77ruPq666qmwi6O/v57zzzmPatGlMnz6dSy+9FIC77rqLGTNmMG3aNM444wxee+31Cd8uvfRS9ttvP6ZNm7bhKuP5559n9uzZTJ8+nQMPPJDly5dvUdzlZJoIIuLh9N7+9IiYHRF/joiFEbEwXX9ZROwdEftGxIERcX+W8ZhZNmbPaOPiE6fR1tqCgLbWFi4+cVpmvYaGIqv2i46ODo466ij22msvxo0bx7Jlywasv+KKK3jqqafo6upi+fLlnHrqqbz66qt85CMf4Qc/+AErVqxg/fr1XH755Ru2GT9+PMuWLeOss87i61//OgAXXHABM2bMYPny5Vx00UWcfvrpWxR3Of5lsZkNi9kz2rjv/MN56qvHct/5hzdEEoDs2i8WLVrEnDlzAJgzZw6LFi0asP7OO+9k7ty5bL110kt/3LhxrFy5kilTprDXXnsB8OEPf5hf/OIXG7Y58cQTAXjnO9/JqlWrALj33ns57bTTADj88MNZu3YtL7zwwhbFXmrEjT5qZjYUE1tb6Cnzob8l7Rdr167l7rvv5tFHH0US/f39SOLss8/eUCciNurKuak739tuuy0ATU1NrF+/vuI2w9111lcEZjaqZdF+cfPNN3P66afz9NNPs2rVKp555hmmTJlCd3f3hjpHHnkkCxcu3PCB/vzzz/PWt76VVatW8cQTTwDwve99j0MPPXTQ1zrkkEO4/vrrAbjnnnsYP348O+6442bHXo4TgZmNalm0XyxatIj3v//9A8pOOukkLrroog3PP/axj7H77rszffp09t13X2644QbGjBnD1VdfzQc+8AGmTZvGVlttxdy5c0t3P8CFF15IZ2cn06dP5/zzz+faa6/d7LgrGXFzFre3t4cnpjHLt8cee4y3ve1t9Q6jYZU7PpIeioj2cvV9RWBmlnNOBGZmOedEYGYj0ki7rV0rm3NcnAjMbMQZM2YMa9eudTIoUZiPYMyYMUPazr8jMLMRZ9KkSXR3dw953P08KMxQNhROBGY24jQ3Nw9pBi4bnG8NmZnlnBOBmVnOORGYmeWcE4GZWc45EZiZ5Zx7DZk1sI6uHhYsXcnq3j4mtrYwb9bUhhnn30YPJwKzBlWYYrEwu1ZhikXAycCGlW8NmTWorKZYNCvlRGDWoLKaYtGslBOBWYOqNJXilkyxaFaOE4FZA+ro6uGV19ZvVL6lUyyalePGYrMGU9pIXPDG7Zq54Pi93VBsw85XBGYNplwjMcB222ztJGCZcCIwazBuJLZa860hswYzsbWFnjIf+m4kHr3q/cNBXxGYNZh5s6bS0tw0oMyNxKNXoU2op7eP4PUfDnZ09dQsBicCswYze0YbF584jbbWFgS0tbZw8YnT3D4wSjXCDwd9a8isAc2e0eYP/pxohDYhXxGYmdVRI/xw0InAzKyOGqFNKNNEIKlV0s2SHpf0mKSDStZL0jclPSFpuaT9sozHzKzRNEKbUNZtBN8Abo+IkyVtA2xXsv5o4C3p4wDg8vRfM7PcqHebUGZXBJJ2BA4BrgKIiL9GRG9JtROA6yLxANAqadesYjIzs41leWtoT2ANcLWkLklXStq+pE4b8EzR8+60bABJZ0rqlNS5Zs2a7CI2M8uhLBPB1sB+wOURMQN4BTi/pI7KbBcbFURcERHtEdE+YcKE4Y/UzCzHskwE3UB3RDyYPr+ZJDGU1tmt6PkkYHWGMZmZWYnMEkFE/Al4RlKhD9QRwG9Lqv0YOD3tPXQg8EJE/DGrmMzMbGNZ9xr6Z+D6tMfQk8BHJc0FiIiFwK3AMcATwF+Aj2Ycj5mZlcg0EUTEw0B7SfHCovUBnJNlDGZmNjiPNWRVqfcwuWaWHScC26TSqRMLw+QCTgZmo4DHGrJNaoRhcs0sO04EtkmNMEyumWXHicA2qRGGyTWz7DgR2CY1wjC5ZpYdNxbbJhUahN1ryGx0ciKwqtR7mFwzy45vDZmZ5ZwTgZlZzjkRmJnlnBOBmVnOORGYmeWcE4GZWc45EZiZ5ZwTgZlZzjkRmJnlnBOBmVnOORGYmeWcE4GZWc45EZiZ5ZxHH7VRqaOrx8Nmm1XJicBGnY6uHuYvXrFhnuWe3j7mL14B4GRgVoZvDdmos2Dpyg1JoKBvXT8Llq6sU0Rmjc2JwEad1b19Qyo3yzsnAht1Jra2DKncLO+cCGzUmTdrKi3NTQPKWpqbmDdrap0iMmtsbiy2UafQIOxeQ2bVcSKwUWn2jDZ/8JtVybeGzMxyzonAzCznMr01JGkV8BLQD6yPiPaS9TOBHwFPpUWLI+JLWcZkZmYD1aKN4LCIeG6Q9b+MiONqEIeZmZXhW0NmZjmXdSII4A5JD0k6s0KdgyQ9Iuk2SXtnHI+ZmZXI+tbQwRGxWtKbgJ9KejwiflG0fhmwR0S8LOkYoAN4S+lO0iRyJsDuu++ecchmZvmS6RVBRKxO/30WWALsX7L+xYh4OV2+FWiWNL7Mfq6IiPaIaJ8wYUKWIZuZ5U5miUDS9pLGFpaBI4FHS+rsIknp8v5pPGuzisnMzDaW5a2hnYEl6ef81sANEXG7pLkAEbEQOBk4S9J6oA+YExGRYUxmZlYis0QQEU8C+5YpX1i0fBlwWVYxmJnZplVMBJIuJen1U1ZEfDKTiMzMrKYGuyLorFkUZmZWNxUTQURcW/w8bfiNQi8fMzMbHTbZa0jSPpK6SHr8/Db9cZh/+GVmNkpU0330CuDfImKPiNgd+DTwnWzDMjOzWqkmEWwfET8rPImIe4DtM4vIzMxqqpruo09K+nfge+nzf+T1YaPNzGyEq+aK4AxgArCYZJiICcBHswzKzMxqZ5NXBBHxZ8C/GTAzG6U2mQgk7QWcB0wurh8Rh2cXlpmZ1Uo1bQT/DSwEriSZctJs1Ojo6mHB0pWs7u1jYmsL82ZNZfaMtnqHZVZT1SSC9RFxeeaRmNVYR1cP8xevoG9d8v2mp7eP+YtXADgZWK5UbCyWNE7SOOB/JJ0taddCWVpuNqItWLpyQxIo6FvXz4KlK+sUkVl9DHZF8BDJoHNKn88rWhfAnlkFZVYLq3v7hlRuNloNNtbQlFoGYlZrE1tb6CnzoT+xtaUO0ZjVTzVjDTVL+qSkm9PHuZKaaxGcWZbmzZpKS3PTgLKW5ibmzZpap4jM6qOaxuLLgWbg2+nz09Kyj2UVlFktFBqE3WvI8q6aRPCuiCieaexuSY9kFZBZLc2e0eYPfsu9aoaY6Jf0d4UnkvbEvycwMxs1qrkimAf8TNKTJD2I9sBjDZmZjRrVjDV0l6S3AFNJEsHjEfFa5pGZmVlNVNNr6BygJSKWR8QjwHaSzs4+NDMzq4Vq2gg+HhG9hSfpaKQfzy4kMzOrpWoSwVaSCr8uRlITsE12IZmZWS1V01i8FLhJ0kKSoSXmArdnGpWZmdVMNYngs8AngLNIGovvIBmS2szMRoFqeg39jeSXxB6K2sxsFKqYCCStILkVVE6U/NrYzMxGqMGuCI4rUyZgEvC5bMIxM7NaG2wY6qcLy5LeAfwD8EHgKeCH2YdmZma1MNitob2AOcApwFrgB4Ai4rAaxWZmZjUw2K2hx4FfAsdHxBMAkv61JlGZmVnNDPaDspOAP5EMOPcdSUfw+rSVVZG0StIKSQ9L6iyzXpK+KekJScsl7Te08M3MbEsN1kawBFgiaXtgNvCvwM6SLgeWRMQdVb7GYRHxXIV1RwNvSR8HkHRRPaDa4M3MbMttcoiJiHglIq6PiONIegw9DJw/TK9/AnBdJB4AWiXtOkz7NjOzKlQz1tAGEfF8RPxXRBxe7SbAHZIeknRmmfVtwDNFz7vTsgEknSmpU1LnmjVrhhKymZltwpASwWY4OCL2I7kFdI6kQ0rWl2tz2OhHbBFxRUS0R0T7hAkTsojTzCy3Mk0EEbE6/fdZYAmwf0mVbmC3oueTgNVZxmRmZgNllggkbS9pbGEZOBJ4tKTaj4HT095DBwIvRMQfs4rJzMw2Vs3oo5trZ5JeR4XXuSEibpc0FyAiFgK3AscATwB/wXMhm5nVXGaJICKeBDYamC5NAIXlAM7JKgYzM9u0LK8IzKrS0dXDgqUrWd3bx8TWFubNmsrsGRt1HjOzjDgRWF11dPUwf/EK+tb1A9DT28f8xSsAnAzMaiTr7qNmg1qwdOWGJFDQt66fBUtX1ikis/xxIrC6Wt3bN6RyMxt+TgRWVxNbW4ZUbmbDz4nA6mrerKm0NDcNKGtpbmLerKl1isgsf9xYbHVVaBB2ryGz+nEisLqbPaPNH/xmdZSLROB+6mZmlY36ROB+6mZmgxv1jcXup25mNrhRnwjcT93MbHCjPhG4n7qZ2eBGfSJwP3Uzs8GN+sZi91M3MxvcqE8E4H7qZmaDGfW3hszMbHBOBGZmOedEYGaWc04EZmY550RgZpZzTgRmZjnnRGBmlnNOBGZmOedEYGaWc04EZmY550RgZpZzTgRmZjnnRGBmlnNOBGZmOZeLYagbSUdXj+dGMLOGkvkVgaQmSV2SbimzbqakFyQ9nD6+kHU89dTR1cP8xSvo6e0jgJ7ePuYvXkFHV0+9QzOzHKvFraFPAY8Nsv6XEfGO9PGlGsRTNwuWrqRvXf+Asr51/SxYurJOEZmZZZwIJE0CjgWuzPJ1RorVvX1DKjczq4WsrwguAT4D/G2QOgdJekTSbZL2LldB0pmSOiV1rlmzJpNAa2Fia8uQys3MaiGzRCDpOODZiHhokGrLgD0iYl/gUqCjXKWIuCIi2iOifcKECRlEWxvzZk2lpblpQFlLcxPzZk2tU0RmZtleERwMvE/SKuBG4HBJ3y+uEBEvRsTL6fKtQLOk8RnGVFezZ7Rx8YnTaGttQUBbawsXnzjNvYbMrK4UEdm/iDQTOC8ijisp3wX4fxERkvYHbia5QqgYVHt7e3R2dmYar5nZaCPpoYhoL7eu5r8jkDQXICIWAicDZ0laD/QBcwZLAmZmNvxqckUwnHxFYGY2dINdEXiICTOznHMiMDPLOScCM7OccyIwM8s5JwIzs5xzIjAzyzknAjOznHMiMDPLOScCM7OccyIwM8s5JwIzs5xzIjAzyzknAjOznHMiMDPLOScCM7OccyIwM8s5JwIzs5xzIjAzyzknAjOznHMiMDPLOScCM7OccyIwM8s5JwIzs5xzIjAzyzknAjOznHMiMDPLOScCM7OccyIwM8s5JwIzs5xzIjAzy7mt6x2AmZkNrqOrhwVLV7K6t4+JrS3MmzWV2TPahm3/mV8RSGqS1CXpljLrJOmbkp6QtFzSflnHY2Y2knR09TB/8Qp6evsIoKe3j/mLV9DR1TNsr1GLW0OfAh6rsO5o4C3p40zg8hrEY2Y2YixYupK+df0DyvrW9bNg6cphe41ME4GkScCxwJUVqpwAXBeJB4BWSbtmGZOZ2UiyurdvSOWbI+srgkuAzwB/q7C+DXim6Hl3WmZmZsDE1pYhlW+OzBKBpOOAZyPiocGqlSmLMvs6U1KnpM41a9YMW4xmZo1u3qyptDQ3DShraW5i3qypw/YaWV4RHAy8T9Iq4EbgcEnfL6nTDexW9HwSsLp0RxFxRUS0R0T7hAkTsorXzKzhzJ7RxsUnTqOttQUBba0tXHzitGHtNaSIjb6ADztJM4HzIuK4kvJjgXOBY4ADgG9GxP6D7au9vT06OzuzCtXMbFSS9FBEtJdbV/PfEUiaCxARC4FbSZLAE8BfgI/WOh4zs7yrSSKIiHuAe9LlhUXlAZxTixjMzKw8DzFhZpZzTgRmZjnnRGBmlnM16TU0nCStAZ7OYNfjgecy2O9wcozDYyTECCMjTsc4PGoR4x4RUbb//YhLBFmR1Fmpa1WjcIzDYyTECCMjTsc4POodo28NmZnlnBOBmVnOORG87op6B1AFxzg8RkKMMDLidIzDo64xuo3AzCznfEVgZpZzTgRmZjk36hOBpN0k/UzSY5J+I+lTZerMk/Rw+nhUUr+kcem6VZJWpOsyGfZU0hhJv5L0SBrjF8vUqTi/s6SjJK1M151fxxhPTWNbLul+SfsWrWuU4zhT0gtFf+8vFK1rlONY1/OxKI7Nmm+8Fsexyhjrej5WGWNdz8cNImJUP4Bdgf3S5bHA74C3D1L/eODuouergPEZxyhgh3S5GXgQOLCkzjHAbWndA4EH0/Im4PfAnsA2wCODvb+MY3w38MZ0+ehCjA12HGcCt5TZtmGOY73Px6LX+jfghgrHq67nY5Ux1vV8rDLGup6PhceovyKIiD9GxLJ0+SXgMQafDvMUYFEtYiuIxMvp0+b0UdqKX2l+5/2BJyLiyYj4K8kkQCfUI8aIuD8i/pw+fYBkoqGaqfI4VtIwx7FEzc9H2KL5xmtyHKuJsd7nI1R1HCup2XGEHNwaKiZpMjCD5FtYufXbAUcBPywqDuAOSQ9JOjPD2JokPQw8C/w0IkpjrDS/c83mfa4ixmL/RPKNsaBRjiPAQemtmdsk7Z2WNdxxrOf5yObPN17Lecg3FWOxupyPVBdjXc9HyFEikLQDyX+of4mIFytUOx64LyKeLyo7OCL2I7m0PEfSIVnEFxH9EfEOkm8t+0vap6RKpfmdq5r3eThUESMAkg4j+Y/32aLiRjmOy0jGXNkXuBToKIRdbnd1irGgLuejtmy+8ZocxypjLNSty/lYZYx1Px8hJ4lAUjNJErg+IhYPUnUOJZfhEbE6/fdZYAnJJVtmIqKXZBKfo0pWVZrfuap5n4fTIDEiaTrJZfAJEbG2aJuGOI4R8WLh1kxE3Ao0SxpPgx3HVL3Oxy2Zb7xWx7GaGOt9Pm4yxoY5H7NqfGiUB0lmvQ64ZBP13gA8D2xfVLY9MLZo+X7gqAxinAC0psstwC+B40rqHMvAxrlfpeVbA08CU3i9UWnvOsW4O8m0o+8uKW+k47gLr/+Qcn/gD+kxbZjjWO/zsSSOmZRvzKzr+VhljHU9H6uMsa7nY+FR8zmL6+Bg4DRgRXpfFuBzJCcJ8frUme8H7oiIV4q23RlYIgmSP8wNEXF7BjHuClwrqYnkKu2miLhFVczvHBHrJZ0LLCXpafDdiPhNnWL8ArAT8O30mK2PZETFRjqOJwNnSVoP9AFzIvlf2EjHEep7PpbVYOdjNTHW+3ysJsZ6n49JTGkmMjOznMpFG4GZmVXmRGBmlnNOBGZmOedEYGaWc04EZmY5l4fuo2YVSdoJuCt9ugvQD6xJn+8fyTgvhbqrgPaIeK6mQZplzInAci2SX5u+A0DShcDLEfH1ugZlVmO+NWRWQtIR6fjxKyR9V9K2RavnKZlP4FeS3pzWP17Sg+k2d0raOS3fQdLV6X6WSzopLT9K0rJ0oLG70rJxkjrSeg+kQyOY1YQTgdlAY4BrgA9FxDSSq+azita/GBH7A5eRjCwJcC/JnAIzSMaU+Uxa/u/ACxExLSKmA3dLmgB8BzgpkoHGPpDW/SLQldb7HMmwKGY14URgNlAT8FRE/C59fi1QPDLloqJ/D0qXJwFLJa0A5gGFoYTfC3yrsGEkY+MfCPwiIp5Kywoji74H+F5adjewk6Q3DOP7MqvIicBsoFc2sT7KLF8KXJZeQXyC5ML+2PUAAADTSURBVKoCksHDSsdwKVdWKB/stcwy40RgNtAYYHLh/j/JgIU/L1r/oaJ//zddfgPQky5/uKjuHcC5hSeS3phuc6ikKWnZuHT1L4BT07KZwHNRed4Ms2HlXkNmA71KMpLmf0vaGvg1sLBo/baSHiT5EnVKWnZhWr+HZErEKWn5l4FvSXqUpFvqFyNicToj1mJJW5HMUvb36T6ulrScZDTP4oRilimPPmpmlnO+NWRmlnNOBGZmOedEYGaWc04EZmY550RgZpZzTgRmZjnnRGBmlnP/H/KRaw4CkZmZAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "data_str = '''Region Alcohol Tobacco\n",
    "North 6.47 4.03\n",
    "Yorkshire 6.13 3.76\n",
    "Northeast 6.19 3.77\n",
    "East_Midlands 4.89 3.34\n",
    "West_Midlands 5.63 3.47\n",
    "East_Anglia 4.52 2.92\n",
    "Southeast 5.89 3.20\n",
    "Southwest 4.79 2.71\n",
    "Wales 5.27 3.53\n",
    "Scotland 6.08 4.51\n",
    "Northern_Ireland 4.02 4.56'''\n",
    "\n",
    "# Read in the data. Note that for Python 2.x, you have to change the \"import\" statement\n",
    "from io import StringIO\n",
    "df = pd.read_csv(StringIO(data_str), sep=r'\\s+')\n",
    "\n",
    "# Plot the data\n",
    "df.plot('Tobacco', 'Alcohol', style='o')\n",
    "plt.ylabel('Alcohol')\n",
    "plt.title('Sales in Several UK Regions')\n",
    "#plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "## Show Regression Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                Alcohol   R-squared:                       0.615\n",
      "Model:                            OLS   Adj. R-squared:                  0.567\n",
      "Method:                 Least Squares   F-statistic:                     12.78\n",
      "Date:                Tue, 14 Apr 2020   Prob (F-statistic):            0.00723\n",
      "Time:                        18:26:27   Log-Likelihood:                -4.9998\n",
      "No. Observations:                  10   AIC:                             14.00\n",
      "Df Residuals:                       8   BIC:                             14.60\n",
      "Df Model:                           1                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "==============================================================================\n",
      "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "Intercept      2.0412      1.001      2.038      0.076      -0.268       4.350\n",
      "Tobacco        1.0059      0.281      3.576      0.007       0.357       1.655\n",
      "==============================================================================\n",
      "Omnibus:                        2.542   Durbin-Watson:                   1.975\n",
      "Prob(Omnibus):                  0.281   Jarque-Bera (JB):                0.904\n",
      "Skew:                          -0.014   Prob(JB):                        0.636\n",
      "Kurtosis:                       1.527   Cond. No.                         27.2\n",
      "==============================================================================\n",
      "\n",
      "Warnings:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
     ]
    }
   ],
   "source": [
    "result = sm.ols('Alcohol ~ Tobacco', df[:-1]).fit()\n",
    "print(result.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "## Model Parameters"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "### F-Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "F-statistic: 12.785,  p-value: 0.00723\n"
     ]
    }
   ],
   "source": [
    "N = result.nobs\n",
    "k = result.df_model+1\n",
    "dfm, dfe = k-1, N - k\n",
    "F = result.mse_model / result.mse_resid\n",
    "p = 1.0 - stats.f.cdf(F,dfm,dfe)\n",
    "print('F-statistic: {:.3f},  p-value: {:.5f}'.format( F, p ))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "### Likelihood"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ln(L) = -4.999758697385978\n"
     ]
    }
   ],
   "source": [
    "N = result.nobs\n",
    "SSR = result.ssr\n",
    "s2 = SSR / N\n",
    "L = ( 1.0/np.sqrt(2*np.pi*s2) ) ** N * np.exp( -SSR/(s2*2.0) )\n",
    "print('ln(L) =', np.log( L ))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "## Coefficients"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Intercept    2.041223\n",
      "Tobacco      1.005896\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(result.params)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "### Standard Error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [],
   "source": [
    "# Standard Errors\n",
    "df['Eins'] = np.ones(( len(df), ))\n",
    "Y = df.Alcohol[:-1]\n",
    "X = df[['Tobacco','Eins']][:-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1.00136021        nan]\n",
      " [       nan 0.28132158]]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Programs\\WPy64-3760\\python-3.7.6.amd64\\lib\\site-packages\\ipykernel_launcher.py:17: RuntimeWarning: invalid value encountered in sqrt\n"
     ]
    }
   ],
   "source": [
    "X = df.Tobacco[:-1]\n",
    "\n",
    "# add a column of ones for the constant intercept term\n",
    "X = np.vstack(( np.ones(X.size), X ))\n",
    "\n",
    "# convert the NumPy arrray to matrix\n",
    "X = np.matrix( X )\n",
    "\n",
    "# perform the matrix multiplication,\n",
    "# and then take the inverse\n",
    "C = np.linalg.inv( X * X.T )\n",
    "\n",
    "# multiply by the MSE of the residual\n",
    "C *= result.mse_resid\n",
    "\n",
    "# take the square root\n",
    "SE = np.sqrt(C)\n",
    "\n",
    "print(SE)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "### T-statistic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "t = 3.5756084542390836\n"
     ]
    }
   ],
   "source": [
    "ii = 1\n",
    "beta = result.params[ii]\n",
    "se = SE[ii,ii]\n",
    "t = beta / se\n",
    "print('t =', t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "p = 0.007\n"
     ]
    }
   ],
   "source": [
    "N = result.nobs\n",
    "k = result.df_model + 1\n",
    "dof = N - k\n",
    "p_onesided = 1.0 - stats.t( dof ).cdf( t )\n",
    "p = p_onesided * 2.0\n",
    "print('p = {0:.3f}'.format(p))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "### Confidence Intervals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-0.26791770937096127 4.350363883047379\n"
     ]
    }
   ],
   "source": [
    "ii = 0\n",
    "\n",
    "# the estimated coefficient, and its variance\n",
    "beta, c = result.params[ii], SE[ii,ii]\n",
    "\n",
    "# critical value of the t-statistic\n",
    "N = result.nobs\n",
    "P = result.df_model\n",
    "dof = N - P - 1\n",
    "z = stats.t( dof ).ppf(0.975)\n",
    "\n",
    "# the confidence interval\n",
    "print(beta - z * c, beta + z * c)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "## Analysis of Residuals"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "### Skew and Kurtosis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Skewness: -0.014,  Kurtosis: 1.527\n"
     ]
    }
   ],
   "source": [
    "d = Y - result.fittedvalues\n",
    "S = np.mean( d**3.0 ) / np.mean( d**2.0 )**(3.0/2.0)\n",
    "K = np.mean( d**4.0 ) / np.mean( d**2.0 )**(4.0/2.0)\n",
    "print('Skewness: {:.3f},  Kurtosis: {:.3f}'.format( S, K ))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "### Omnibus Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Omnibus: 2.541898169064917\n",
      "Pr( Omnibus ) = 0.2805652152710698\n",
      "Omnibus: 2.5418981690649174, p = 0.28056521527106976\n"
     ]
    }
   ],
   "source": [
    "def Z1( s, n ):\n",
    "    Y = s * np.sqrt( ( ( n + 1 )*( n + 3 ) ) / ( 6.0 * ( n - 2.0 ) ) )\n",
    "    b = 3.0 * ( n**2.0 + 27.0*n - 70 )*( n + 1.0 )*( n + 3.0 )\n",
    "    b /= ( n - 2.0 )*( n + 5.0 )*( n + 7.0 )*( n + 9.0 )\n",
    "    W2 = - 1.0 + np.sqrt( 2.0 * ( b - 1.0 ) )\n",
    "    alpha = np.sqrt( 2.0 / ( W2 - 1.0 ) )\n",
    "    z = 1.0 / np.sqrt( np.log( np.sqrt( W2 ) ) )\n",
    "    z *= np.log( Y / alpha + np.sqrt( ( Y / alpha )**2.0 + 1.0 ) )\n",
    "    return z\n",
    "\n",
    "def Z2( k, n ):\n",
    "    E = 3.0 * ( n - 1.0 ) / ( n + 1.0 )\n",
    "    v = 24.0 * n * ( n - 2.0 )*( n - 3.0 )\n",
    "    v /= ( n + 1.0 )**2.0*( n + 3.0 )*( n + 5.0 )\n",
    "    X = ( k - E ) / np.sqrt( v )\n",
    "    b = ( 6.0 * ( n**2.0 - 5.0*n + 2.0 ) ) / ( ( n + 7.0 )*( n + 9.0 ) )\n",
    "    b *= np.sqrt( ( 6.0 * ( n + 3.0 )*( n + 5.0 ) ) / ( n * ( n - 2.0 )*( n - 3.0 ) ) )\n",
    "    A = 6.0 + ( 8.0 / b )*( 2.0 / b + np.sqrt( 1.0 + 4.0 / b**2.0 ) )\n",
    "    z = ( 1.0 - 2.0 / A ) / ( 1.0 + X * np.sqrt( 2.0 / ( A - 4.0 ) ) )\n",
    "    z = ( 1.0 - 2.0 / ( 9.0 * A ) ) - z**(1.0/3.0)\n",
    "    z /= np.sqrt( 2.0 / ( 9.0 * A ) )\n",
    "    return z\n",
    "\n",
    "K2 = Z1( S, N )**2.0 + Z2( K, N )**2.0\n",
    "print('Omnibus: {}'.format( K2))\n",
    "\n",
    "p = 1.0 - stats.chi2(2).cdf( K2 )\n",
    "print('Pr( Omnibus ) = {}'.format( p ))\n",
    "\n",
    "(K2, p) = stats.normaltest(result.resid)\n",
    "print('Omnibus: {0}, p = {1}'.format(K2, p))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "### Durbin Watson"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Durbin-Watson: 1.97535\n"
     ]
    }
   ],
   "source": [
    "DW = np.sum( np.diff( result.resid.values )**2.0 ) / result.ssr\n",
    "print('Durbin-Watson: {:.5f}'.format( DW ))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "### Jarque-Bera Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "JB-statistic: 0.90421,  p-value: 0.63629\n"
     ]
    }
   ],
   "source": [
    "JB = (N/6.0) * ( S**2.0 + (1.0/4.0)*( K - 3.0 )**2.0 )\n",
    "p = 1.0 - stats.chi2(2).cdf(JB)\n",
    "print('JB-statistic: {:.5f},  p-value: {:.5f}'.format( JB, p ))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "### Condition Number"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(array([136.51527115,   0.18412885]), matrix([[ 0.96332746, -0.26832855],\n",
      "        [ 0.26832855,  0.96332746]]))\n"
     ]
    }
   ],
   "source": [
    "X = df.Tobacco[:-1]\n",
    " \n",
    "# add a column of ones for the constant intercept term\n",
    "X = np.vstack(( X, np.ones( X.size ) ))\n",
    " \n",
    "# convert the NumPy arrray to matrix\n",
    "X = np.matrix( X )\n",
    "EV = np.linalg.eig( X * X.T )\n",
    "print(EV)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Condition No.: 27.22887\n"
     ]
    }
   ],
   "source": [
    "CN = np.sqrt( EV[0].max() / EV[0].min() )\n",
    "print('Condition No.: {:.5f}'.format( CN ))"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Slideshow",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
